/*

	This file loads publicly available datasets (see Readme file)

*/

cd ${work}1_data

*** COUNTRY KEY
insheet using input\country_codes_V202102.csv , clear case
keep country_code iso_2digit_alpha iso_3digit_alpha
rename country_code code
rename iso_2digit_alpha land
rename iso_3digit_alpha iso3

*TAIWAN DOES NOT APPEAR IN TAIWAN, BUT (ACCORDING TO CEPII) "ASIA, NOT ELSEWHERE CLASSIFIED" (CODE 490) IS A GOOD PROXY FOR TAIWAN
replace land="TW"	if code==490
replace iso3="TWN"	if code==490

compress
save output\country_codes_baci, replace


*** BACI DATA FROM CEPII
local start	2002
local end	2015

foreach y of numlist `start'/`end' {
    
	insheet using input\BACI_HS92_Y`y'_V202001.csv, clear case
	keep i j k t v
	save temp\baci`y', replace	
	
}

clear
local command use
foreach y of numlist `start'/`end' {
    `command' temp\baci`y'
	erase temp\baci`y'.dta
	local command "append using"
}

compress
save output\baci, replace


*** COUNTRY INCOME GROUPS FROM THE WORLD BANK
use land iso3 using output\country_codes_baci , clear
duplicates drop
save temp\temp, replace

import excel using "input\WITSCountryProfile-Country_Indicator_ProductMetada-en.xlsx" , clear firstrow case(lower)
keep countryiso3 incomegroup
rename countryiso3 iso3
rename incomegroup income_group

drop if income_group=="Others"
replace income_group="High income" if income_group=="High income: OECD" | income_group=="High income: nonOECD"

replace iso3="ROU" if iso3=="ROM" //to ensure match

*ADD TAIWAN MANUALLY AS HIGH INCOME COUNTRY
count
local N=r(N)+1
set obs `N'
replace iso3="TWN" if _n==_N
replace income_group="High income" if _n==_N

*MERGE ON 2-LETTER COUNTRY CODES
merge 1:1 iso3 using temp\temp , keep(3) nogen
drop iso3
erase temp\temp.dta

compress
save output\income_d, replace


*** INDUSTRYKEY FROM DST (from DB07 to standard group 69)
import excel using "input\Dansk-Branchekode-2007-(DB07)-v3-2014.xlsx" , clear firstrow case(lower)
keep branchekode grp127kode
rename branchekode db07
rename grp127kode g127
destring db07, replace

gen		g69 = g127
replace g69 = 6090	 if g127 >= 6000 & g127 <= 9000
replace g69 = 10120	 if g127 >= 10001 & g127 <= 12000
replace g69 = 13150	 if g127 >= 13000 & g127 <= 15000
replace g69 = 20000	 if g127 >= 20001 & g127 <= 20002
replace g69 = 23000	 if g127 >= 23001 & g127 <= 23002
replace g69 = 26000	 if g127 >= 26001 & g127 <= 26002
replace g69 = 27000	 if g127 >= 27001 & g127 <= 27003
replace g69 = 28000	 if g127 >= 28001 & g127 <= 28002
replace g69 = 31320	 if g127 >= 31000 & g127 <= 32002

keep db07 g69
compress
save output\industrykey, replace


*** DEFLATION INDICES FOR MATERIAL EXPENDITURES AND CAPITAL STOCK, RESPECTIVELY
*1966-2015, base year is 2010
foreach x in NABP69 NABK69 {	

	import excel using input/`x'.xlsx, firstrow sheet("Output") clear
	gen g69 = substr(A,1,5)
	drop if real(g69)==.
	destring g69, replace
	keep g69 AE-AY		//AE is 1995
	order g69 AE-AY

	local y=1995
	foreach v of varlist AE-AY {
		preserve
			rename `v' `x'
			keep g69 `x'
			gen year=`y'
			if "`x'"=="NABP69" save output/MPI_`y', replace
			if "`x'"=="NABK69" save output/GFCF_`y', replace
		restore
		local y=`y'+1
		}
}	

*** DEFLATION INDEX FOR WAGE BILL
*1980-2015, base year is 2015, but rescale to 2010 to match MPI and GFCF
import excel using input\PRIS112.xlsx, sheet("Output") clear
keep Q-AK
destring _all, replace
xpose, clear
rename v1 year
rename v2 CPI
local base=CPI[16]
replace CPI=CPI/`base'
save output\CPI, replace


*** BEC PRODUCT GROUPS
import excel using input\CN_BEC_2021, clear firstrow
keep if real(CN)!=. & real(BEC)!=.
drop if real(substr(START,-4,4))>2015 | real(substr(END,-4,4))<2002
keep CN BEC
duplicates drop
duplicates report CN //some have two, use latest:
bysort CN : keep if _n==_N
compress
save output\bec_p, replace